EG started this on 20160403 how does the noise effect the loglikelihood Weibull model->theta is a scale, gamma is a shape parameter >0 Gompertz Model-> beta(G) is a scale, alpha(R) is a shape parameter>0

difference function of loglikelihood function of gompertz and weibull p.d.fs test if L(Weibull,X)>L(Gompertz,X) for parameters Weibull model->theta is a scale, gamma is a shape parameter >0 Gompertz Model-> beta is a scale, alpha is a shape parameter>0

For additive Gaussian noise e ~ N (0, sigma^2) with known variance sigma^2 sd of gaussian noise function max sd would be = 3*mean(inverse.gomp.CDF) min sd would be mean(inverse.gomp.CDF)

require(flexsurv)
## Loading required package: flexsurv
## Loading required package: survival
require(gplots)
## Loading required package: gplots
## 
## Attaching package: 'gplots'
## The following object is masked from 'package:stats':
## 
##     lowess
gamma=0.01
theta=0.25
#test G and R in nested for loops
R=0.001
G=0.2
#R should be in [0, 0.05], and G should be [0.05, 0.3].  
N=2000 # population size

Introduce random Gompertz and Weibull functions with noise calculation func.

#rgompertz(alpha,beta,N) gives random Gompertz numbers from inverse CDF of Gompertz
#where alpha and beta are 2 parameters, N is number of population
#generate gompertz random numbers by using inverse CDF
#generate random number with a given distribution of Gompertz
#prediction
rgompertz = function(R,G, N){
  x.uniform = runif(N)
  #inverse of Gompertz CDF
  inverse.gomp.CDF = function(R,G,y) {  (1/G)*log(1 - (G/R)*log(1-y)  ) }
  x.gompertz = inverse.gomp.CDF(R,G, x.uniform)
  return(x.gompertz)
}
set.seed(123)
#generate gompertz random numbers (lifespan) 
#prediction
gompertz.random<-rgompertz(0.01,0.15,N)
average.lifespan=mean(gompertz.random)

#generate gaussion random numbers 
set.seed(123)
gaussian<-rnorm(N, mean = 2*average.lifespan, sd=0)


lifespan = gompertz.random+ gaussian/10

calculate.s = function( lifespan ){
  myData = sort( lifespan[!is.na(lifespan)] );
  tmpGC = table( myData )
  for( i in 2:length(tmpGC)) {
    tmpGC[i] = tmpGC[i-1] + tmpGC[i]        }    
  tot = length(myData)
  tmpGC = tmpGC / tot; 
  s = 1 - tmpGC
  #list( s=s, t=unique(my.data));
  ret = data.frame( cbind(s, unique(myData)));
  names(ret) = c("s", "t");
  ret;
}


GC = calculate.s(lifespan)
plot(GC$s ~ GC$t)

#3)calculate the mortality rates he mortality rate change over time, and then plot log(mortality rate) ~ time.
#Gompertz model should give a linear form.  
#For Weibull model, log(moretality rate) ~ log(time) give the linear form.

#HQin's calculate mortality rate function to calculate the rate of mortality over time
calculate.mortality.rate = function( lifespan ){
  GC = calculate.s(lifespan)
  GC$ds=NA; GC$dt=NA
  #first point
  GC$dt[1] = GC$t[2]
  GC$ds[1] = 1 - GC$s[1]
  GC$mortality.rate[1] = GC$ds[1] / GC$dt[1]
  
  for( j in 2:length(GC[,1])) {
    GC$ds[j] =  GC$s[j-1] - GC$s[j] 
    GC$dt[j] = -GC$t[j-1] + GC$t[j]
    GC$mortality.rate[j] = GC$ds[j] / ( GC$s[j] * GC$dt[j])
  }
  return(GC)
} #end of calculate.mortality.rate()

GC = calculate.mortality.rate(lifespan)



GC = calculate.s(round( lifespan, digits=1))
head(GC)
##          s   t
## 3.2 0.9980 3.2
## 3.4 0.9975 3.4
## 3.5 0.9960 3.5
## 3.6 0.9940 3.6
## 3.7 0.9925 3.7
## 3.8 0.9915 3.8
GC$ds=NA; GC$dt=NA
GC$dt[1] = GC$t[2] #20130321 correct a bug GC$s -> GC$t
GC$ds[1] = 1 - GC$s[1]
GC$mortality.rate[1] = GC$ds[1] / GC$dt[1]

for( j in 2:length(GC[,1])) {
  GC$ds[j] =  GC$s[j-1] - GC$s[j] 
  GC$dt[j] = -GC$t[j-1] + GC$t[j]
  GC$mortality.rate[j] = GC$ds[j] / ( GC$s[j] * GC$dt[j])
}
plot( GC$s ~ GC$t)

plot( GC$mortality.rate ~ GC$t, typ='l', log='y' )

#then plot log(mortality rate) ~ time.
#For Weibull model, log(moretality rate) ~ log(time) give the linear form.

#pdf(paste("plots/","Gompertz.semi.log.plot.batch.pdf", sep=''), width=5, height=5)
plot( log10(GC$mortality.rate) ~ GC$t, type='l') #linear for Gompertz, semi-log plot

#dev.off()

#pdf(paste("plots/","Weibull.log.log.plot.batch.pdf", sep=''), width=5, height=5)
plot( log10(GC$mortality.rate) ~ log10(GC$t), type='l'  ) #linear for Weibull, log-log plot

#dev.off()
rweibull= function(theta,gamma,N)
{
  x.uniform= runif(n)
  inverse.wei.CDF=function(theta,gamma,y) { theta*(-log(1-y))^(1/gamma)}
  x.weibull=inverse.wei.CDF(theta,gamma,x.uniform)
  return(x.weibull)
}

#create a function that calculates noise of lifespan
calculate.noise = function(i){
  
  #lifespan
  gaussian<-rnorm(N, mean = 0, sd=i)
  #observation
  #X<- gompertz.random +gaussian to be used in simulation later.
  #noise
  noise=sd(gaussian)
  
  return(noise)
}

initialize variables

#generate gompertz random numbers (lifespan) 
#prediction
gompertz.random<-rgompertz(R,G,N)
average.lifespan=mean(gompertz.random)

#put the variable values from loops into lists
sderr<- list()
Delta_LL<-list() 
G<-list()
R<-list()
LWei<-list()
LGomp<-list()
MeanLF<-list()
sdLS<-list()
Delta_LL.flex<-list()
LWei.flex<-list()
LGomp.flex<-list()
G.flex.estimated<-list()
R.flex.estimated<-list()
LLG.par<-list()
LLR.par<-list()
gamma.flex.estimated<-list()
theta.flex.estimated<-list()

simulate for parameters beta,alpha and noise=i to search effect of noise on delta likl

with change in parameters

 for (R_in in c(1E-3, 0.002,0.003, 0.005,0.008, 0.01,0.03, 0.05)){ #fix alpha or in other words R shape parameter
    for (G_in in c(0.05,0.08, 0.1,0.12,0.15,0.17, 0.2, 0.25)){
    #for (sd in seq(round.lifespan,3*round.lifespan,by=1)){
    for (i in c(0, 0.5, 1, 1.5, 2,3,4, 5)){ 
      
      #generate gompertz random numbers (lifespan) 
      #prediction
      set.seed(123)
      gompertz.random<-rgompertz(R_in,G_in,N)
      average.lifespan=mean(gompertz.random)
      
      #store average.lifespan into MeanLF list
      MeanLF[[length(MeanLF)+1]]=average.lifespan
      
      #check the sd by using calculate.noise() function
      sd.gaussian=calculate.noise(i)
      
      #generate gaussion random numbers 
      gaussian<-rnorm(N, mean = 2*average.lifespan, sd=i)
      
      #standard deviation of gompertz.random
      sd.lifespan=sd(gompertz.random)
      
      #store sd of lifespan into SdLS list
      sdLS[[length(sdLS)+1]] =sd.lifespan
      
      #add gaussian random numbers to gompertz random numbers
      lifespan<- gompertz.random +gaussian/10
      
      
      
      #Log likelihood function for the Weibull model
      
     weib.likl<-function(param,y){
        theta<- param[1] #take exponential to avoid NaNs when taking log(theta)
        gamma<- param[2] # avoid NaNs when taking log(gamma)
        delta=1; # delta is 1 for right censored data which is our case; lifespan>0
        y=lifespan[!is.na(lifespan)]
        
        logl<-sum(delta*(log(gamma) + gamma*log(theta) + (gamma-1)*log(y) -
                           (theta*y)^gamma )) -sum((1-delta)*(theta*y)^gamma)
        
        return(-logl)
      }
      # take log(param) since you take exponential above to avoid NaN values above
      weib=optim(c(0.03,0.001),weib.likl,y=lifespan)
      weib$value
      LWei[[length(LWei)+1]] = -weib$value
      
      
       gomp.likl <- function (param,y){
        
        G_in<-param[1]
        R_in<-param[2]
        delta=1
        y=lifespan[!is.na(lifespan)]
        logl<-sum(delta*(log(G_in)+R_in*y+(-(G_in/R_in)*(exp(R_in*y)-1)))) +
          sum((1-delta)*(-(G_in/R_in)*(exp(R_in*y)-1)))
        return(-logl)
      }
      gomp<-optim(c(G_in,R_in),gomp.likl,y=lifespan)
      #c(0.01,0.01) (best one)
      #c(0.025,0.03)
      #R should be in [0, 0.05], and G should be [0.05, 0.3].  
      
      gomp$value
      
      #store loglikelihood values of gompertz optimized results into LGomp list
      LGomp[[length(LGomp)+1]] = -gomp$value
      
      # store R and G estimation from optim of likl functions in Gompertz
      LLG.par[[length(LLG.par)+1]] =gomp$par[1]
      LLR.par[[length(LLR.par)+1]]=gomp$par[2]
      
      delta.likelihood.wei<- -weib$value-(-gomp$value)
      
      #calculate LL and noise change
      sderr[[length(sderr)+1]] = i       
      Delta_LL[[length(Delta_LL)+1]] = delta.likelihood.wei
      G[[length(G)+1]]=G_in
      #switch to alpha.seq when for fixed beta
      R[[length(R)+1]]=R_in
      
      #todo use flexsurv to calculate the LL
      
      #flexsurv only works with positive variables.
      #fix gaussian std to 0
      
      set.seed(123)
      gaussian.flex= rnorm(N, mean = 2*average.lifespan, sd=i)
      X.flex= gompertz.random +gaussian.flex/10
      
      
      fitGomp = flexsurvreg(formula = Surv(X.flex) ~ 1, dist="gompertz")
      fitWei = flexsurvreg(formula = Surv(X.flex) ~ 1, dist="weibull")
      
      
      LWei.flex[[length(LWei.flex)+1]]=fitWei$loglik
      
      LGomp.flex[[length(LGomp.flex)+1]]=fitGomp$loglik
      
      param.Gomp<-fitGomp$res; R.flex<-param.Gomp[2]; G.flex<-param.Gomp[1];
      
      R.flex.estimated[[length(R.flex.estimated)+1]]<-R.flex
      G.flex.estimated[[length(G.flex.estimated)+1]]<-G.flex
      
      param.Wei<-fitWei$res; gamma.flex<-param.Wei[1]; theta.flex<-param.Wei[2];
      
      gamma.flex.estimated[[length(gamma.flex.estimated)+1]]<-gamma.flex; 
      theta.flex.estimated[[length(theta.flex.estimated)+1]]<-theta.flex
      
      delta_flexsurv=fitWei$loglik-fitGomp$loglik 
      
      #fitWei$loglik
      
      Delta_LL.flex[[length(Delta_LL.flex)+1]]=delta_flexsurv
      
    }
  }
}
#put everything into a data frame
results = data.frame(cbind(sderr), cbind(R),cbind(LLR.par),cbind(R.flex.estimated),cbind(G),cbind(LLG.par),cbind(G.flex.estimated),cbind(Delta_LL) , cbind(Delta_LL.flex),
                     cbind(LWei),cbind(LWei.flex), cbind(LGomp),cbind(LGomp.flex), cbind(MeanLF), cbind(sdLS))



results_mat<-as.matrix(results)

write.csv(results_mat,file="Results.csv")

#write.csv(results_mat,file="noise_zero.csv")

dLL<-unlist(results$Delta_LL )
dLL.flex<-unlist(results$Delta_LL.flex)
LLGomp<-unlist(results$LGomp)
LLGomp.flex<- unlist(results$LGomp.flex)
LLWei<- unlist(results$LWei)
LLWei.flex<- unlist(results$LWei.flex)
simulated.G<-unlist(results$G)
estimated.G.flex<-unlist(results$G.flex.estimated)
simulated.R<-unlist(results$R)
estimated.R.flex<-unlist(results$R.flex.estimated)

estimatedLL.G<-unlist(results$LLG.par)
estimatedLL.R<-unlist(results$LLR.par)


#LLGomp.flex
#delta.output<-data.frame(dLL,dLL.flex,LLGomp,LLGomp.flex,LLWei,LLWei.flex)

summary( lm( dLL~ dLL.flex))
## 
## Call:
## lm(formula = dLL ~ dLL.flex)
## 
## Residuals:
##      Min       1Q   Median       3Q      Max 
## -0.77277 -0.13910 -0.02708  0.07267  1.10045 
## 
## Coefficients:
##             Estimate Std. Error  t value Pr(>|t|)    
## (Intercept) 0.006877   0.010507    0.654    0.513    
## dLL.flex    0.995378   0.000248 4012.874   <2e-16 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 0.2268 on 510 degrees of freedom
## Multiple R-squared:      1,  Adjusted R-squared:      1 
## F-statistic: 1.61e+07 on 1 and 510 DF,  p-value: < 2.2e-16
summary( lm( LLGomp ~ LLGomp.flex))
## 
## Call:
## lm(formula = LLGomp ~ LLGomp.flex)
## 
## Residuals:
##     Min      1Q  Median      3Q     Max 
## -3.5629 -0.4739  0.1786  0.6992  1.6613 
## 
## Coefficients:
##               Estimate Std. Error  t value Pr(>|t|)    
## (Intercept) -3.689e+00  3.241e-01   -11.38   <2e-16 ***
## LLGomp.flex  9.996e-01  4.712e-05 21211.90   <2e-16 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 0.9832 on 510 degrees of freedom
## Multiple R-squared:      1,  Adjusted R-squared:      1 
## F-statistic: 4.499e+08 on 1 and 510 DF,  p-value: < 2.2e-16
summary( lm( LLWei ~ LLWei.flex))
## 
## Call:
## lm(formula = LLWei ~ LLWei.flex)
## 
## Residuals:
##     Min      1Q  Median      3Q     Max 
## -2.8885 -0.4448  0.1393  0.5347  1.5214 
## 
## Coefficients:
##               Estimate Std. Error  t value Pr(>|t|)    
## (Intercept) -3.278e+00  2.549e-01   -12.86   <2e-16 ***
## LLWei.flex   9.997e-01  3.699e-05 27028.31   <2e-16 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 0.7769 on 510 degrees of freedom
## Multiple R-squared:      1,  Adjusted R-squared:      1 
## F-statistic: 7.305e+08 on 1 and 510 DF,  p-value: < 2.2e-16
summary(lm(simulated.G~estimated.G.flex))
## 
## Call:
## lm(formula = simulated.G ~ estimated.G.flex)
## 
## Residuals:
##       Min        1Q    Median        3Q       Max 
## -0.016252 -0.001830  0.001983  0.004704  0.011033 
## 
## Coefficients:
##                    Estimate Std. Error t value Pr(>|t|)    
## (Intercept)      -0.0035869  0.0007586  -4.729 2.93e-06 ***
## estimated.G.flex  0.9644296  0.0046861 205.805  < 2e-16 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 0.006737 on 510 degrees of freedom
## Multiple R-squared:  0.9881, Adjusted R-squared:  0.9881 
## F-statistic: 4.236e+04 on 1 and 510 DF,  p-value: < 2.2e-16
summary(lm(simulated.R~estimated.R.flex))
## 
## Call:
## lm(formula = simulated.R ~ estimated.R.flex)
## 
## Residuals:
##        Min         1Q     Median         3Q        Max 
## -0.0034008 -0.0004474 -0.0000728  0.0004308  0.0022575 
## 
## Coefficients:
##                   Estimate Std. Error t value Pr(>|t|)    
## (Intercept)      9.555e-04  4.724e-05   20.22   <2e-16 ***
## estimated.R.flex 1.615e+00  3.704e-03  436.13   <2e-16 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 0.000843 on 510 degrees of freedom
## Multiple R-squared:  0.9973, Adjusted R-squared:  0.9973 
## F-statistic: 1.902e+05 on 1 and 510 DF,  p-value: < 2.2e-16
#pdf(paste("plots/","simulated.G.vs.estimated.G.flex.batch.pdf", sep=''), width=5, height=5)
plot(simulated.G~estimated.G.flex)

#dev.off()

#pdf(paste("plots/","simulated.R.vs.estimated.R.flex.batch.pdf", sep=''), width=5, height=5)
plot(simulated.R~estimated.R.flex)

#dev.off()

heatmap for fixed G

results.sub<-data.frame(cbind(sderr),cbind(R),cbind(G),cbind(Delta_LL))


R.els = unlist( unique(results.sub$R))
colnum = length(R.els)

tmp = unlist( unique(results.sub$sderr))
noise.els = tmp[order(tmp)]
rownum = length(noise.els)

mat = matrix( data=NA, nrow= rownum, ncol=colnum) #noise as row, alpha as columns
rownames(mat) = noise.els
colnames(mat) = R.els
for (k in c(0.05,0.08, 0.1,0.12,0.15,0.17, 0.2, 0.25)){
  
  data = results.sub[results.sub[,3]==k, 4]
  
  
  
  
  data<-unlist(data)
  
  heat_mat<-matrix(data,ncol=colnum,nrow=rownum)
  
  #rownames(heat_mat, do.NULL = TRUE, prefix = "row")
  rownames(heat_mat) <- c("0","0.5","1","1.5","2","3","4","5")
  
  colnames(heat_mat) <- R.els
  library(gplots)
  hM <- format(round(heat_mat, 2))
  #data_mat<-scale(heat_mat,scale=TRUE,center=FALSE)
  
  
  #paste(file = "~/github/model.comparison/plots/heatplot_zero_noise_G",k,".jpeg",sep="") 
  #jpeg(paste("plots/",k, ".fixed.G.jpg", sep=''))
  
  #paste(“myplot_”, i, “.jpeg”, sep=””)
  
  heatmap.2(heat_mat, cellnote=hM,col = cm.colors(256), scale="none", notecol="black",  margins=c(5,10),
            dendrogram='none', Rowv=FALSE, Colv=FALSE,trace='none',
            xlab     = "R parameters",
            ylab     = "noise", main = bquote(paste("R vs. sd dLL at" ~ G==.(k))),par(cex.main=.5),srtCol=315, adjCol = c(0,1),cexRow=0.8,cexCol=0.8)
  
  
  #dev.off()
  
}
## NULL

## NULL

## NULL

## NULL

## NULL

## NULL

## NULL

## NULL

# heatmap for fixed noise

G.els = unlist( unique(results.sub$G))
colnum = length(G.els)

R.els=unlist(unique(results.sub$R))
rownum = length(R.els)

mat = matrix( data=NA, nrow= rownum, ncol=colnum) #noise as row, alpha as columns
rownames(mat) = R.els
colnames(mat) = G.els
for (n in c(0, 0.5, 1,1.5,2,3,4,5) ){
#for (n in c(0)){
  data = results.sub[results.sub[,1]==n, 4]
  
  
  
  
  data<-unlist(data)
  
  heat_mat<-matrix(data,ncol=colnum,nrow=rownum)
  
  #rownames(heat_mat, do.NULL = TRUE, prefix = "row")
  rownames(heat_mat) <- R.els
  
  colnames(heat_mat) <- G.els
  library(gplots)
  hM <- format(round(heat_mat, 2))
  #data_mat<-scale(heat_mat,scale=TRUE,center=FALSE)
  
  
  #paste(file = "~/github/model.comparison/plots/heatplot_zero_noise_G",k,".jpeg",sep="") 
  #jpeg(paste("plots/",n, ".fixed_noise.jpg", sep=''))
  
  #paste(“myplot_”, i, “.jpeg”, sep=””)
  
  heatmap.2(heat_mat, cellnote=hM,col = cm.colors(256), scale="none", notecol="black",  margins=c(5,10),
            dendrogram='none', Rowv=FALSE, Colv=FALSE,trace='none',
            xlab     = "R parameters",
            ylab     = "G parameters", main = bquote(paste("R vs G of dLL at" ~ sd==.(n))),par(cex.main=.5),srtCol=315, adjCol = c(0,1),cexRow=0.8,cexCol=0.8)
  
  
  #dev.off()
  
}
## NULL

## NULL

## NULL

## NULL

## NULL

## NULL

## NULL

## NULL

# heatmap or fixed R

G.els = unlist( unique(results.sub$G))
colnum = length(G.els)

tmp = unlist( unique(results.sub$sderr))
noise.els = tmp[order(tmp)]
rownum = length(noise.els)

mat = matrix( data=NA, nrow= rownum, ncol=colnum) #noise as row, alpha as columns
rownames(mat) = noise.els
colnames(mat) = G.els
for (j in c(1E-3, 0.002,0.003, 0.005,0.008, 0.01,0.03, 0.05) ){
  
  

  data = results.sub[results.sub[,2]==j, 4]
  
  
  
  
  data<-unlist(data)
  
  heat_mat<-matrix(data,ncol=colnum,nrow=rownum)
  
  #rownames(heat_mat, do.NULL = TRUE, prefix = "row")
  rownames(heat_mat) <- c("0","0.5","1","1.5","2","3","4","5")
  
  colnames(heat_mat) <- G.els
  library(gplots)
  hM <- format(round(heat_mat, 2))
  #data_mat<-scale(heat_mat,scale=TRUE,center=FALSE)
  
  
  #paste(file = "~/github/model.comparison/plots/heatplot_zero_noise_G",k,".jpeg",sep="") 
  #jpeg(paste("plots/",j, ".fixed_R.jpg", sep=''))
  
  #paste(“myplot_”, i, “.jpeg”, sep=””)
  
  heatmap.2(heat_mat, cellnote=hM,col = cm.colors(256), scale="none", notecol="black",  margins=c(5,10),
            dendrogram='none', Rowv=FALSE, Colv=FALSE,trace='none',
            xlab     = "G parameters",
            ylab     = "noise", main = bquote(paste("G vs. sd of dLL at" ~ R==.(j))),par(cex.main=.5),srtCol=315, adjCol = c(0,1),cexRow=0.8,cexCol=0.8)
  
  
  #dev.off()
  
}
## NULL

## NULL

## NULL

## NULL

## NULL

## NULL

## NULL

## NULL